//	AUTHOR:			ENRICO CANTONI
//	CREATED:		FEBRUARY 19, 2020
//	MODIFIED: 		JUNE 20, 2022
//	DESCRIPTION:	Makes un-tabulated descriptive statistics.


// Set Stata version
version 16.1

********************************************************************************
********************************************************************************

*** COHABITATION BEFORE MARRIAGE ***

// Load data
use ${analysis_clean} if s_all

// Within each household, find ID of household head and spouse/partner
#delimit ;
gegen ID_head = max(ID_voter *
	(hh_position == "Head of household":hh_position)),
	by(ID_hh year)
;
gegen ID_spouse = max(ID_voter * (hh_position == "Spouse":hh_position)),
	by(ID_hh year)
;
gegen temp = max(ID_voter * (hh_position == "Cohabiting":hh_position)),
	by(ID_hh year)
;
#delimit cr
replace ID_spouse = temp if inlist(ID_spouse, 0, .) & !inlist(temp, 0, .)
mvdecode ID_head ID_spouse, mv(0)

// Check counts of observations unmarried today, married tomorrow
by ID_voter (year): gen futuremarried = married[_n+1] & !married & _n != _N
tab futuremarried if futuremarried
local nfuturemarried = r(N)

// Check counts of cohabiting observations unmarried today, married tomorrow
#delimit ;
by ID_voter (year): gen flag = futuremarried &
	(((ID_head == ID_head[_n+1]) &
		(ID_spouse == ID_spouse[_n+1])) |
	((ID_head == ID_spouse[_n+1]) &
		(ID_spouse == ID_head[_n+1]))) &
	!missing(ID_spouse) & !missing(ID_spouse[_n+1])
;
#delimit cr
tab flag if flag
local ncohabiting = r(N)
drop futuremarried
di in yellow "N unmarried obs that will be married tomorrow = `nfuturemarried'"
di in yellow "Of which are currently cohabiting = `ncohabiting'"
di in yellow "Share = `=`ncohabiting'/`nfuturemarried''"

// Check 2008 marital status among couples who were cohabiting AND unmarried
// in 2004 and remain cohabiting w/ the same person in 2008
#delimit ;
by ID_voter (year): gen flag_2004to2008 =
	(((ID_head[_n-1] == ID_head)		&
		(ID_spouse[_n-1] == ID_spouse)) |
	((ID_head[_n-1] == ID_spouse)		&
		(ID_spouse[_n-1] == ID_head)))	&
	!missing(ID_spouse[_n-1])			&
	!missing(ID_spouse)					&
	year == 2008 & year[_n-1] == 2004	&
	!married[_n-1]
;
#delimit cr
tab maritalstatus if flag_2004to2008

// Check 2013 marital status among couples who were cohabiting AND unmarried
// in 2009 and remain cohabiting w/ the same person in 2009
#delimit ;
by ID_voter (year): gen flag_2009to2013 =
	(((ID_head[_n-1] == ID_head)		&
		(ID_spouse[_n-1] == ID_spouse)) |
	((ID_head[_n-1] == ID_spouse)		&
		(ID_spouse[_n-1] == ID_head)))	&
	!missing(ID_spouse[_n-1])			&
	!missing(ID_spouse)					&
	year == 2013 & year[_n-1] == 2009	&
	!married[_n-1]
;
#delimit cr
tab maritalstatus if flag_2009to2013

// Check income for people who transition into marriage
// from being "dependents" of the head of the household (vs. people who
// transition into marriage from other positions within the household)
by ID_voter (year): gen counter = _n
xtset ID_voter counter
sum hh_income_mod if married & !L.married & L.hh_position == "Child":hh_position
sum hh_income_mod if married & !L.married & L.hh_position != "Child":hh_position
sum hh_income_mod if !married & F.married & hh_position == "Child":hh_position
sum hh_income_mod if !married & F.married & hh_position != "Child":hh_position
clear

********************************************************************************
********************************************************************************

*** SHARE OF KIDS AGED 0-5 W/ UNMARRIED VS. SOUTH-BORN PARENTS ***

// Load data, keeping 1 obs per household-year
use ${analysis_clean} if s_kids, clear

// Flag couples where both are born South
gegen temp = min(inrange(birth_region, 9, 20)), by(ID_hh)
gegen temp2 = min(temp), by(ID_voter)
gegen temp3 = max(maritalstatus != "Single":maritalstatus), by(ID_voter)
gen flag_south = temp2 & temp3

// Flag never-married voters
#delimit ;
gegen flag_nevermarried = min(maritalstatus == "Single":maritalstatus),
	by(ID_voter)
;
#delimit cr

// Keep only if they have 1+ kids aged 0 to 5
keep if nkids_0to5 > 0

// Keep one person per household
bysort ID_hh year (hh_position): keep if _n == 1

// Expand to reflect counts of children 0-5
expand nkids_0to5

// Tabulate # kids by year in given categories
tab flag_south year
tab flag_nevermarried year
clear

********************************************************************************
********************************************************************************

*** BOLOGNA: TRENDS IN MARRIAGE, COHABITATION, AND CHILDREN OUTSIDE MARRIAGE ***

// Load data
use ${analysis_clean} if s_all

// Flag cohabiting individuals
#delimit ;
gegen temp_cohabiting = total(hh_position == "Cohabiting":hh_position),
	by(ID_hh year)
;
bysort ID_hh year (hh_position):
	gen temp_check =
		!married[1] & !married[2] &						// Not married
		hh_position[2] == "Cohabiting":hh_position &	// Cohabiting
		female[1] != female[2]							// Diff sexes
;
gen cohabiting =	temp_cohabiting == 1 &
					temp_check & 
					inlist(hh_position,
						"Head of household":hh_position,
						"Cohabiting":hh_position)
;
#delimit cr
drop temp_cohabiting temp_check

// Tabulate marital status (w/ cohabitation) by year
replace maritalstatus = 5 if cohabiting
label define maritalstatus 5 "Cohabiting", add
tab maritalstatus year, col

// Tabulate marital status (w/ cohabitation) by year for voters 50 or younger
tab maritalstatus year if age <= 50, col

// Tabulate marital status of heads of households and spouses/cohabiting
// partners w/ kids in 0-to-5 age range
tab maritalstatus year if s_kids & anykids_0to5, col

// Check marital status of heads of households and spouses/cohabiting partners
// w/ kids in 0-to-17 age range
egen anykids_0to17 = rowmax(anykids_0to5 anykids_6to11 anykids_12to17)
tab maritalstatus year if s_kids & anykids_0to17, col
clear

********************************************************************************
********************************************************************************

*** MARITAL STATUS FOR FIRST-TIME PARENTS ***

// Load data
use ${analysis_clean} if s_kids

// Time relative to switch from no kids to 1+ kids
#delimit ;
by ID_voter (year): gen t0 = _n if
	nkids_0to18[_n-1] == 0 &
	nkids_0to18 > 0 &
	_n != 1
;
#delimit cr
gegen temp = max(t0), by(ID_voter)
replace t0 = temp if !missing(temp)
by ID_voter (year): gen t = _n - t0
drop temp

// Marital status at birth of first child and in elections thereafter
forvalues t = 0/2 {
	tab maritalstatus if t == `t'
}

// Same stats, sample restricted to voters observed 3 times after childbirth
gegen temp = max(t), by(ID_voter)
forvalues t = 0/2 {
	tab maritalstatus if t == `t' & temp == 2
}
clear

********************************************************************************
********************************************************************************

*** COUNTS AND AGE OF KIDS ***

// Load data
use ${analysis_clean}

// Number of kids in 18-50 age range
gen nkids_tot = nkids_0to18 + nkids_18more
sum nkids_tot nkids_0to18 nkids_18more if s_kids & age <= 50
#delimit ;
sum nkids_tot nkids_0to18 nkids_18more if s_kids & age <= 50 &
	nkids_tot >= 1, detail
;
#delimit cr

// Age difference between first- and second-born kids
gegen temp = max(nkids_0to18), by(ID_hh year)
keep if temp >= 2 & hh_position == "Child":hh_position
bysort ID_hh year (age): gen agediff = age - age[_n-1] if _n != 1
clear

********************************************************************************
********************************************************************************

*** CORRELATION BETWEEN OWN AND SPOUSAL TURNOUT ***

// Compute pairwise correlation between own and spousal turnout
// for voters who switch from being single to married and whose
// spouse is in the dataset over the same elections

// Outcome
local Y			voted

// Covariates
#delimit ;
local covs
	year##nbhd
	precinct_age
	precinct_citizen
	precinct_female
	precinct_hh_income_mod
	hh_citizen
	hh_income_mod
	income
	irpef
	female##(anykids_0to5
			 anykids_6to11
			 anykids_12to17
			 anykids_18more)
;
#delimit cr

// Load data for given sample
use ${analysis_clean} if s_all, clear

// Keep voters who are (1) ever married and (2) ever single
gegen ever_married = max(married), by(ID_voter)
gegen ever_single = max(!married & !divorced & !widowed), by(ID_voter)
gegen ever_other = max(divorced | widowed), by(ID_voter)
keep if ever_married & ever_single & !ever_other
drop ever_married ever_single ever_other

// Keep only people who, at endline, are heads of households or spouses
by ID_voter (year): keep if inlist(hh_position[_N], 1, 2)
by ID_voter (year): gen endline_voter = year[_N]

// Keep only voters who are in 2-people households in the last election
forvalues i = 1/2 {
	gegen temp = nvals(ID_voter), by(ID_hh year)
	gegen todrop = max((year == endline_voter) & (temp == 1)), by(ID_voter)
	drop if todrop
	drop temp todrop
}

// Find spouse ID
local v ID_spouse
#delimit ;
bysort ID_hh year (hh_position):
	gen long `v' = ID_voter[1] if (_n == 2) & (year == endline_voter)
;
by ID_hh year (hh_position): replace `v' = ID_voter[2] if
	(_n == 1) & (year == endline_voter)
;
#delimit cr
assert !missing(`v') if year == endline_voter

// Fill in missing values of spouse ID
gegen temp = min(`v'), by(ID_voter)
replace `v' = temp if missing(`v')
drop temp

// Given voter should not be the spouse of 2+ spouses over the sample period
gegen nspouses = nvals(`v'), by(ID_voter)
drop if nspouses > 1
drop nspouses

// Given spouse should not be the spouse of 2+ voter over the sample period
gegen nspouses = nvals(ID_voter), by(`v')
gegen temp = max(nspouses), by(ID_voter)
drop if temp > 1
drop temp nspouses

// Keep only voters who are in 2-people households in the last election
forvalues i = 1/2 {
	gegen temp = nvals(ID_voter), by(ID_hh year)
	gegen todrop = max((year == endline_voter) & (temp == 1)), by(ID_voter)
	drop if todrop
	drop temp todrop
}

// Save temp file
tempfile temp
save `temp'

// Merge w/ spouse turnout
keep ID_voter ID_hh voted year female
#delimit ;
rename	(ID_voter ID_hh voted female)
		(ID_spouse ID_hh_spouse voted_spouse female_spouse)
;
#delimit cr
merge 1:1 ID_spouse year using `temp', keep(match) nogen

// Keep only couples of different genders
keep if female != female_spouse

// Check cohabitation in last pre-marriage election
/*  cohabiting |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |      3,561       57.11       57.11
          1 |      2,674       42.89      100.00
------------+-----------------------------------
      Total |      6,235      100.00
*/
bysort ID_voter (year): gen future_married = (!married & married[_n+1])
gen cohabiting = (ID_hh == ID_hh_spouse)
tab cohabiting if future_married

// Resave temp file
save `temp', replace

// Get residual turnout in full sample
use ${analysis_clean} if s_all, clear
#delimit ;
reghdfe `Y' `covs' maritalstatus##female,
	absorb(ID_voter female##age) residuals(`Y'_res)
;
#delimit cr
keep ID_voter year `Y'_res
preserve
merge 1:1 ID_voter year using `temp', assert(master match) keep(match) nogen
save `temp', replace
restore
rename (ID_voter `Y'_res) (ID_spouse voted_spouse_res)
merge 1:1 ID_spouse year using `temp', assert(master match) keep(match) nogen

// Correlation between own and spouse's residualized turnout
corr `Y'_res `Y'_spouse_res if !married
corr `Y'_res `Y'_spouse_res if married
corr voted_res voted_spouse_res if !married & (ID_hh == ID_hh_spouse)

// Correlation between own and spouse's turnout
corr `Y' `Y'_spouse if !married
corr `Y' `Y'_spouse if married
corr voted voted_spouse if !married & (ID_hh == ID_hh_spouse)
clear

********************************************************************************
********************************************************************************

*** FEMALE LFP IN ITALIAN PROVINCES ***

// Import data
#delimit ;
import_delimited ${data_raw_ISTAT}/lfp_byprovinceyear_2004to2019.csv,
	clear
	delimit("|")
	varnames(1)
;
#delimit cr

// Keep only provinces (103 different provinces)
keep if regexm(ïitter107, "([0-9][0-9]$)")
gdistinct ïitter107

// Keep relevant vars
keep territorio sesso time value cittadinanza classedietã titolodistudio

// Drop useless obs
#delimit ;
keep if	cittadinanza == "TOTAL" &
		classedietã == "15-64 anni" &
		sesso != "totale" &
		titolodistudio == "totale" 
;
#delimit cr

// Men-minus-female gap in LFP
bysort territorio time (sesso): gen lfp_mminusfgap = value[2] - value[1]

// Get Bologna's (province) mean female LFP: absolute terms and relative rank
bysort sesso time (value): gen rank_flfp = _N - _n + 1
bysort sesso time (lfp_mminusfgap): gen rank_gap = _n
local sub sesso == "femmine" & inlist(time, "2004", "2008", "2009", "2013")
sum value lfp_mminusfgap rank* if territorio == "Bologna" & `sub'
foreach yr in 2004 2008 2009 2013 {
	di in yellow "Tabulating LFP stats for year: `yr'"
	#delimit ;
	sum value lfp_mminusfgap rank*
	if territorio == "Bologna" &
		sesso == "femmine" &
		time == "`yr'"
	;
	#delimit cr
}

********************************************************************************
********************************************************************************

*** NURSERY SCHOOL COVERAGE OF 0-2 POPULATION IN BOLOGNA ***

// Import city population 2013
local name pop_bycity_2013
#delimit ;
import_delimited ${data_raw_ISTAT}/`name'.csv,
	clear delimit(",") rowrange(2:) varnames(2)
;
#delimit cr
keep if etã == 999
gen pop = totalefemmine + totalemaschi
keep codicecomune nomecomune pop
save ${data_scratch}/`name', replace

// Import nursery school data
local name nurseryschools_bycityyear
import_delimited ${data_raw_ISTAT}/`name'.csv, clear delimit("|") varnames(1)
save ${data_scratch}/`name', replace
use ${data_scratch}/`name'

// Restrict sample
#delimit ;
keep if	tipodato == "posti autorizzati per 100 bambini di 0-2 anni" &
		tipodiserviziosocioeducativo == "asilo nido" &
		tipodigestione == "tutte le voci" &
		settoredeltitolare == "totale" &
		regexm(ïitter107, "^([0-9])")	// Drop provinces
;
#delimit cr
gisid ïitter107 time

// Create rank in service coverage
bysort time (value): gen rank = _N - _n + 1

// Merge w/ total 2013 population
rename ïitter107 codicecomune
destring codicecomune, replace
merge m:1 codicecomune using ${data_scratch}/pop_bycity_2013, nogen

// Separate stats by different classes of municipal population size
sum value rank* if nomecomune == "Bologna" & time == 2013
sum value if time == 2013, detail
foreach str in 15 50 100 {
	gen city`str'k = pop >= `str'000
	bysort time city`str'k (value): gen rank_`str'k = _N - _n + 1
	sum rank_`str'k if nomecomune == "Bologna" & time == 2013 & city`str'k
	sum value if time == 2013 & city`str'k, detail
}

********************************************************************************
********************************************************************************

*** SHARE OF FEMALE CITY COUNCILLORS ***

// Import data
local name storico_amministratori_comuni31122012
import_delimited ${data_raw_Interno}/`name'.csv, clear

// Data cleaning
gen female = sesso == "F"

// Summarize female-to-male ratio among consiglieri in different areas, offices
#delimit ;
local offices
	`"`"regexm(descrizione_carica, "^Consigliere")"'
	  `"regexm(descrizione_carica, "^Assessore")"'"'
;
local areas
	`"`"descrizione_comune == "BOLOGNA""'
	  `"codice_regione == 8"'
	  `"codice_regione <= 8"'
	  `"!missing(codice_region)"'"'
;
#delimit cr
foreach o of local offices {
	foreach a of local areas {
		di in yellow `"Office: `o' geography: `a'"'
		sum female if `o' & `a'
	}
}
clear

********************************************************************************
********************************************************************************

*** TIME BETWEEN MARRIAGE AND APPEARENCE OF FIRST KID IN DATA ***

// Load data
use ${analysis_clean} if s_all

// Flag people switching from unmarried to married
by ID_voter (year): gen flag = married & !married[_n-1] & _n != 1 & hh_position <= 3
egen year_married = max(year * flag), by(ID_voter)
replace year_married = . if year_married == 0
gen diff = yearfirstborn - year_married
sum diff, detail
count if !missing(year_married) & missing(yearfirstborn)
clear